
# ----------------------------------------
#
# Replication files
# Catalinac, Amy 
# Dominance Through Division: Group-based Clientelism in Japan
#-----------------------------------------






#-----------------------------------------
# CHAPTER 7
#
# #-----------------------------------------






#-----------------------------------------
# PREPARATION

setwd("C:/Users/ac6037/Dropbox/TARGETING/replication2018/ANALYSIS OF MASTER DATA")
options(max.print=1000000)

dat <- readRDS("Master_plus_Snow_Turn_Trans_Dis6.rds")
elec.dat <- dat[!is.na(dat$hor_electoral_district),]

library(dplyr)
library(readstata13)
library(stargazer)
library(lmtest)
library(plm)
library(clubSandwich)






# ------------------------------------------------
# This function conducts regressions on panel data, 
# allowing different FE to be specified and different clustering of SEs,
# and adjusts clustering so that it clusters the way Stata does.
#
# For consistency with results in "A Tournament Theory of Pork Barrel Politics:
# The Case of Japan", Comparative Political Studies, 2020.
#
# ------------------------------------------------

stata_plm <- function(formula, data,
                      model = "pooling", effect = "individual",
                      cluster){ # NB: cluster variable has to be a character string specifying the name of the variable we want to
  # cluster on.  It also must be in either of the indices of pdata.frame.
  
  # Plus, we will build in sanity checks: first, is the "cluster" argument a character string?
  if(class(cluster) != "character"){
    cat("Error: argument 'cluster' must be a character string that specifies the name of the cluster variable.")
    
    # Second, is "cluster" argument in either one of the indices of the pdata.frame?
  } else if(!(cluster %in% names(index(data)))){
    cat("Error: clustering variable must be either of indices of the pdata.frame")
  } else{
    
    # Use the cluster argument to specify the "cluster" argument of vcovHC ( which must be either of "group" and "time"):
    if(which(cluster == names(index(data))) == 1){
      cluster_index <- "group"
    } else if (which(cluster == names(index(data))) == 2){
      cluster_index <- "time"
    } else{
      # Finally, just to make sure the clustering variable is in the indices:
      cat("Error: clustering variable not found in the indices.")
    }
    
    # Estimate the regression with plm:
    reg <- plm(formula, data, model = model, effect = effect)
    # Adjust the clustered SEs the way Stata does them:
    len_clst <- length(unique(data[,cluster]))
    vcov <- vcovHC(reg, type = "HC1", method = "arellano",
                   cluster = cluster_index) * (len_clst/(len_clst-1))
    reg$vcov <- vcov
    return(reg)
  }
}








# --------------------------------------------------------------
# Make a dummy variable for whether district has a senior LDP winner

sum(is.na(elec.dat$numLDPwinsen))
elec.dat$ldpsenior <- ifelse(elec.dat$numLDPwinsen!=0, 1, 0)
table(elec.dat$ldpsenior)
class(elec.dat$ldpsenior)









# ----------------------------------------------
# TABLE A.8
#
# ----------------------------------------------

# descriptive statistics for Chapter 7

# First, lets create a district-level data set from the municipality-level dataset
# used in Chapters 6 and 8.

# All municipality-level observations have district-level variables attached.
# "last" is populated for one municipality per electoral district.
# Subsetting the data frame to last==1 will create a district-specific data set.

# Keep to tournament-possible districts with LDP winners.

# Variable capturing (non-logged) per capita NTD electoral districts received after 
# elections (FDISngaidpc) has zeroes in it (mostly due to being in a tournament-impossible
# district and ineligible for NTD), so taking log of this produces -Inf.  
# Restrict to larger numbers.

dist_ss <- subset(elec.dat, elec.dat$tmt_possible==1 & elec.dat$LDPseats!=0 
                  & elec.dat$logFDISngaidpc > -10000
                  & elec.dat$last==1) 

length(unique(dist_ss$district_year))

dist_ss2 <- dist_ss[,c("logFDISngaidpc", 
                     "DISceif", "DISprimary", "DISneedy", "DISdensity", "logDISpop", "logDISincomepc",
                     "HI", "totseat_in_electoral_district",
                     "DISmal", "logNum", "DisLDPVS2", "DisWinLDPVS2", "LDPseats", "ldpsenior", "numLDPcand")]

stargazer(dist_ss2)







# -------------------------------------------
# FIGURE 7.1
# \label{HIfig}
# -------------------------------------------

HIbefER <- subset(elec.dat, elec.dat$year < 1995 &  
                    elec.dat$tmt_possible==1 & 
                    elec.dat$last == 1 & 
                    elec.dat$LDPseats!=0)$HI

HIbet <- subset(elec.dat, elec.dat$year >= 1995 &
                  elec.dat$tmt_possible==1 &
                  elec.dat$last == 1 &
                  elec.dat$LDPseats!=0)
HIERMM <- subset(HIbet, HIbet$year<2005)$HI

HIaftMM <- subset(elec.dat, elec.dat$year >= 2005 &
                    elec.dat$tmt_possible==1 &
                    elec.dat$last == 1 &
                    elec.dat$LDPseats!=0)$HI

# Mean prior to ER
mean(HIbefER)
summary(na.omit(HIbefER))

# Mean after ER, prior to MM
mean(na.omit(HIERMM))
summary(na.omit(HIERMM))

# Mean after ER, after cutting number of munis
mean(na.omit(HIaftMM))
summary(na.omit(HIaftMM))

t.test(HIbefER, HIERMM)
t.test(HIERMM, na.omit(HIaftMM))
# means are the same for latter two periods

par(mfrow = c(1, 1), mar = c(4,4,2,1), tcl = -0.25, mgp = c(1.75, 0.6, 0),
    font.main = 1, cex.main = 2)

plot(density(na.omit(HIbefER), from = 0, to = .6), xlim = c(0,.6), ylim = c(0,8),
     xlab = "", ylab = "", main = "", cex.axis = 1.7, lwd=3)

legend(x=.2, y=2, legend = "Before Electoral Reform (1980-1994)", bty = "n", cex=2)

par(new=T)
plot(density(na.omit(HIERMM), from = 0, to = .6), xlim = c(0,.6), ylim = c(0,8),
     xlab = "", ylab = "",
     main = "", cex.axis = 1.7, lty=2, col = "grey27", lwd=3)
legend(x=.12, y=3, legend = "After Electoral Reform, Pre-Municipal Mergers (1995-2004)", bty = "n", text.col = "grey27", cex=2)

par(new=T)
plot(density(na.omit(HIaftMM), from = 0, to = .6), xlim = c(0,.6), ylim = c(0,8),
     xlab = "Concentration of Voting Population in Electoral Districts", ylab = "Density",
     main = "", cex.axis = 1.7, lty=3, col = "grey46", lwd=3, cex.lab = 2)
legend(x=.1, y=4, legend = "After Municipal Mergers (2005-)", bty = "n", text.col = "grey46", cex=2)

# for reference, specs are: 1350 x 1000


















# ----------------------------------------------------
# TABLE 7.1 
#
# \ref{bet_dist_T1} 
# ----------------------------------------------------

controlsd <- "DISceif + DISneedy + DISprimary + logDISpop + logDISincomepc + DISdensity"

controlsdall <- "DISceif + DISneedy + DISprimary + logDISpop + logDISincomepc + DISdensity + 
                logNum + DISmal + DisWinLDPVS2 + ldpsenior"

pre.red <- subset(elec.dat,
                    elec.dat$last==1 &
                    elec.dat$logFDISngaidpc > -10000 &
                    elec.dat$tmt_possible==1 &
                    elec.dat$LDPseats!=0)

pre.red.pdf <- pdata.frame(pre.red, index = c("district_reform2", "year"))

# NB:
# district_reform2 is an indicator variable for electoral district that is specific to 
# electoral system. 0_101 is Hokkaido First District prior to 1994, while 
# 1_101 is Hokkaido First District after 1994.  This way, we differentiate them.
# Note that it was made with JHRED kucoder, which takes border changes due to 
# redistricting into account.

pre.red.pdf$ldpsenior <- as.factor(pre.red.pdf$ldpsenior)

mod1 <- stata_plm(paste("logFDISngaidpc ~ HI + ", controlsdall), 
                   pre.red.pdf, model = "within", effect = "time",
                   cluster = "district_reform2")
summary(mod1)

# Read in file with electoral districts comprising the capital cities of
# each prefecture:
prefcap <- read.csv("pref_capitals.csv")

# Exclude pref capital disticts from data frame:
pre.red.pdfn <- subset(pre.red.pdf, !pre.red.pdf$hor_electoral_district %in% prefcap$kucode)
length(unique(pre.red.pdfn$district_year))

mod1nop <- stata_plm(paste("logFDISngaidpc ~ HI + ", controlsdall), 
                    pre.red.pdfn, model = "within", effect = "time",
                    cluster = "district_reform2")
summary(mod1nop)

# ----------------------------
# print table

stargazer(mod1, mod1nop,
          no.space = T,
          omit.stat = c("f"),
          star.cutoffs = c(0.05, 0.01, 0.001)
          #out = "models1.txt"
)


# ----------------------------
# Substantive effects of Model 1:

newdat <- mod1$model

# which coef is HI?
mod1$coefficients

# effect of a 1 SD increase in HI:
mod1$coefficients[1]*sd(na.omit(newdat$HI))

# Exponentiate (as log DV):
exp(mod1$coefficients[1]*sd(na.omit(newdat$HI)))

# Express as percentage (as log DV):
(exp(mod1$coefficients[1]*sd(na.omit(newdat$HI)))-1)*100

# sample mean:
mean(na.omit(newdat$logFDISngaidpc))

# Exponentiate this:
exp(mean(na.omit(newdat$logFDISngaidpc)))

# Multiply by units for raw DV (hyaku man en)
exp(mean(na.omit(newdat$logFDISngaidpc)))*1000000
sample_mean <- exp(mean(na.omit(newdat$logFDISngaidpc)))*1000000

# Increase over sample mean:
(((exp(mod1$coefficients[1]*sd(na.omit(newdat$HI)))-1)*100)/100)*sample_mean
increase_over_sm <- (((exp(mod1$coefficients[1]*sd(na.omit(newdat$HI)))-1)*100)/100)*sample_mean
increase_over_sm

exp(mean(newdat$logDISpop))

# How much extra money muni gets 
exp(mean(newdat$logDISpop))*increase_over_sm








# -------------------
# Comparing HI in prefectural capital electoral districts with all others

# HI in pref capitals
check <- subset(pre.red.pdf, pre.red.pdf$hor_electoral_district %in% prefcap$kucode)
mean(na.omit(check$HI))
mean(na.omit(check$num))

# HI in nonpref capitals
check2 <- subset(pre.red.pdf, !(pre.red.pdf$hor_electoral_district %in% prefcap$kucode))
mean(na.omit(check2$HI))
mean(na.omit(check2$num))

t.test(na.omit(check$HI), na.omit(check2$HI))
t.test(na.omit(check$num), na.omit(check2$num))



# -----------------------------------------
# Substantive effects of Model 2

newdat <- mod1nop$model

# which coef is HI?
mod1nop$coefficients

# effect of a 1 SD increase in HI:
mod1nop$coefficients[1]*sd(na.omit(newdat$HI))

# Exponentiate (as log DV):
exp(mod1nop$coefficients[1]*sd(na.omit(newdat$HI)))

# Express as percentage (as log DV):
(exp(mod1nop$coefficients[1]*sd(na.omit(newdat$HI)))-1)*100

# sample mean:
mean(na.omit(newdat$logFDISngaidpc))

# Exponentiate this:
exp(mean(na.omit(newdat$logFDISngaidpc)))

# Multiply by units for raw DV (hyaku man en)
exp(mean(na.omit(newdat$logFDISngaidpc)))*1000000
sample_mean <- exp(mean(na.omit(newdat$logFDISngaidpc)))*1000000

# Increase over sample mean:
(((exp(mod1nop$coefficients[1]*sd(na.omit(newdat$HI)))-1)*100)/100)*sample_mean
increase_over_sm <- (((exp(mod1nop$coefficients[1]*sd(na.omit(newdat$HI)))-1)*100)/100)*sample_mean
increase_over_sm

exp(mean(newdat$logDISpop))

# How much extra money district gets 
exp(mean(newdat$logDISpop))*increase_over_sm










# --------------------------------------------------------------
# Table A.9 
# \ref{bet_dist_T1_alt}

# --------------------------------------------------------------

# Replace DisWinLDPVS2 with LDPseats

controlsdall <- "DISceif + DISneedy + DISprimary + logDISpop + logDISincomepc + DISdensity + 
                logNum + DISmal + LDPseats + ldpsenior"

pre.red <- subset(elec.dat, 
                    elec.dat$last==1 &
                    elec.dat$logFDISngaidpc > -10000 &
                    elec.dat$tmt_possible==1 &
                    elec.dat$LDPseats!=0)

pre.red.pdf <- pdata.frame(pre.red, index = c("district_reform2", "year"))

pre.red.pdf$ldpsenior <- as.factor(pre.red.pdf$ldpsenior)

mod2 <- stata_plm(paste("logFDISngaidpc ~ HI + ", controlsdall), 
                   pre.red.pdf, model = "within", effect = "time",
                   cluster = "district_reform2")
summary(mod2)

prefcap <- read.csv("pref_capitals.csv")
pre.red.pdfn <- subset(pre.red.pdf, !pre.red.pdf$hor_electoral_district %in% prefcap$kucode)
length(unique(pre.red.pdfn$district_year))

mod2nop <- stata_plm(paste("logFDISngaidpc ~ HI + ", controlsdall), 
                    pre.red.pdfn, model = "within", effect = "time",
                    cluster = "district_reform2")
summary(mod2nop)



# --------------------------------------------------------------
# Replace LDPseats with DisLDPVS2

controlsdall <- "DISceif + DISneedy + DISprimary + logDISpop + logDISincomepc + DISdensity + 
                logNum + DISmal + DisLDPVS2 + ldpsenior"

pre.red <- subset(elec.dat, 
                    elec.dat$last==1 &
                    elec.dat$logFDISngaidpc > -10000 &
                    elec.dat$tmt_possible==1 &
                    elec.dat$LDPseats!=0)

pre.red.pdf <- pdata.frame(pre.red, index = c("district_reform2", "year"))

pre.red.pdf$ldpsenior <- as.factor(pre.red.pdf$ldpsenior)

mod3 <- stata_plm(paste("logFDISngaidpc ~ HI + ", controlsdall), 
                   pre.red.pdf, model = "within", effect = "time",
                   cluster = "district_reform2")
summary(mod3)

prefcap <- read.csv("pref_capitals.csv")
pre.red.pdfn <- subset(pre.red.pdf, !pre.red.pdf$hor_electoral_district %in% prefcap$kucode)
length(unique(pre.red.pdfn$district_year))

mod3nop <- stata_plm(paste("logFDISngaidpc ~ HI + ", controlsdall), 
                    pre.red.pdfn, model = "within", effect = "time",
                    cluster = "district_reform2")
summary(mod3nop)


# ----------------------------
# print table

stargazer(mod2, mod3, mod2nop, mod3nop,
          no.space = T,
          omit.stat = c("f"),
          star.cutoffs = c(0.05, 0.01, 0.001)
          #out = "models1.txt"
)








# ----------------------------------------
# Table A.10
#
# \ref{bet_dist_T1_alt2}
# ----------------------------------------

controlsdall <- "DISceif + DISneedy + DISprimary + logDISpop + logDISincomepc + DISdensity + 
                logNum + DISmal"

pre.red <- subset(elec.dat, 
                    elec.dat$last==1 &
                    elec.dat$logFDISngaidpc > -10000 &
                    elec.dat$tmt_possible==1 &
                    elec.dat$LDPseats!=0)

pre.red.pdf <- pdata.frame(pre.red, index = c("district_reform2", "year"))

mod4 <- stata_plm(paste("logFDISngaidpc ~ HI + ", controlsdall), 
                   pre.red.pdf, model = "within", effect = "time",
                   cluster = "district_reform2")
summary(mod4)

prefcap <- read.csv("pref_capitals.csv")
pre.red.pdfn <- subset(pre.red.pdf, !pre.red.pdf$hor_electoral_district %in% prefcap$kucode)
length(unique(pre.red.pdfn$district_year))

mod4nop <- stata_plm(paste("logFDISngaidpc ~ HI + ", controlsdall), 
                    pre.red.pdfn, model = "within", effect = "time",
                    cluster = "district_reform2")
summary(mod4nop)


# ----------------------------
# print table

stargazer(mod4, mod4nop,
          no.space = T,
          omit.stat = c("f"),
          star.cutoffs = c(0.05, 0.01, 0.001)
          #out = "models1.txt"
)












# -------------------------------------------
# TABLE 7.2
#
# \ref{bet_dist_T2}
# -------------------------------------------

controlsd <- "DISceif + DISneedy + DISprimary + logDISpop + logDISincomepc + DISdensity"

controlsdall <- "DISceif + DISneedy + DISprimary + logDISpop + logDISincomepc + DISdensity + 
                logNum + DISmal + DisWinLDPVS2 + ldpsenior"

pre.red2 <- subset(elec.dat, elec.dat$year <= 1993 & 
                     elec.dat$last==1 & 
                     elec.dat$logFDISngaidpc > -10000 &
                     elec.dat$tmt_possible==1 &
                     elec.dat$LDPseats!=0)

pre.red2$ldpsenior <- as.factor(pre.red2$ldpsenior)

pre.red.pdf2 <- pdata.frame(pre.red2, index = c("district_reform2", "year"))

mod5 <- stata_plm(paste("logFDISngaidpc ~ HI + ", controlsdall), 
                   pre.red.pdf2, model = "within", effect = "time",
                   cluster = "district_reform2")
summary(mod5)

mod6 <- stata_plm(paste("logFDISngaidpc ~ HI + ", controlsdall), 
                   pre.red.pdf2, model = "within", effect = "twoways",
                   cluster = "district_reform2")
summary(mod6)

# Remove prefecture capitals from main data, then create pdf:
pre.red2n <- subset(pre.red2, !pre.red2$hor_electoral_district %in% prefcap$kucode)
pre.red2n$ldpsenior <- as.factor(pre.red2n$ldpsenior)
pre.red.pdf2n <- pdata.frame(pre.red2n, index = c("district_reform2", "year"))
length(unique(pre.red.pdf2n$district_year))

mod5nop <- stata_plm(paste("logFDISngaidpc ~ HI + ", controlsdall), 
                    pre.red.pdf2n, model = "within", effect = "time",
                    cluster = "district_reform2")
summary(mod5nop)

mod6nop <- stata_plm(paste("logFDISngaidpc ~ HI + ", controlsdall), 
                    pre.red.pdf2n, model = "within", effect = "twoways",
                    cluster = "district_reform2")
summary(mod6nop)




# ----------------------------
# Print table

stargazer(mod5, mod6, mod5nop, mod6nop,
          no.space = T,
          omit.stat = c("f"),
          star.cutoffs = c(0.05, 0.01, 0.001)
          #out = "models1.txt"
)






# --------------------------------
# Substantive effects of Model 1 (mod5)

newdat <- mod5$model

# which coef is HI?
mod5$coefficients

# effect of a 1 SD increase in HI:
mod5$coefficients[1]*sd(na.omit(newdat$HI))

# Exponentiate (as log DV):
exp(mod5$coefficients[1]*sd(na.omit(newdat$HI)))

# Express as percentage (as log DV):
(exp(mod5$coefficients[1]*sd(na.omit(newdat$HI)))-1)*100

# sample mean:
mean(na.omit(newdat$logFDISngaidpc))

# Exponentiate this:
exp(mean(na.omit(newdat$logFDISngaidpc)))

# Multiply by units for raw DV (hyaku man en)
exp(mean(na.omit(newdat$logFDISngaidpc)))*1000000
sample_mean <- exp(mean(na.omit(newdat$logFDISngaidpc)))*1000000

# Increase over sample mean:
(((exp(mod5$coefficients[1]*sd(na.omit(newdat$HI)))-1)*100)/100)*sample_mean
increase_over_sm <- (((exp(mod5$coefficients[1]*sd(na.omit(newdat$HI)))-1)*100)/100)*sample_mean
increase_over_sm

exp(mean(newdat$logDISpop))

# How much extra money district gets 
exp(mean(newdat$logDISpop))*increase_over_sm












#  -------------------------------------
# Table A.11
# 
# Pre-ER period only: with DisWinLDPVS2 replaced with LDPseats
# \label{bet_dist_T2_alt1}
#
#--------------------------------------

controlsd <- "DISceif + DISneedy + DISprimary + logDISpop + logDISincomepc + DISdensity"

controlsdall <- "DISceif + DISneedy + DISprimary + logDISpop + logDISincomepc + DISdensity + 
                logNum + DISmal + LDPseats + ldpsenior"

pre.red2 <- subset(elec.dat, elec.dat$year <= 1993 & 
                     elec.dat$last==1 & 
                     elec.dat$logFDISngaidpc > -10000 &
                     elec.dat$tmt_possible==1 &
                     elec.dat$LDPseats!=0)

pre.red2$ldpsenior <- as.factor(pre.red2$ldpsenior)

pre.red.pdf2 <- pdata.frame(pre.red2, index = c("district_reform2", "year"))

mod7 <- stata_plm(paste("logFDISngaidpc ~ HI + ", controlsdall), 
                  pre.red.pdf2, model = "within", effect = "time",
                  cluster = "district_reform2")
summary(mod7)

mod8 <- stata_plm(paste("logFDISngaidpc ~ HI + ", controlsdall), 
                  pre.red.pdf2, model = "within", effect = "twoways",
                  cluster = "district_reform2")
summary(mod8)

# Remove prefecture capitals from main data, then create pdf:

pre.red2n <- subset(pre.red2, !pre.red2$hor_electoral_district %in% prefcap$kucode)
pre.red2n$ldpsenior <- as.factor(pre.red2n$ldpsenior)
pre.red.pdf2n <- pdata.frame(pre.red2n, index = c("district_reform2", "year"))
length(unique(pre.red.pdf2n$district_year))

mod7nop <- stata_plm(paste("logFDISngaidpc ~ HI + ", controlsdall), 
                     pre.red.pdf2n, model = "within", effect = "time",
                     cluster = "district_reform2")
summary(mod7nop)

mod8nop <- stata_plm(paste("logFDISngaidpc ~ HI + ", controlsdall), 
                     pre.red.pdf2n, model = "within", effect = "twoways",
                     cluster = "district_reform2")
summary(mod8nop)



# ----------------------------
# print table

stargazer(mod7, mod8, mod7nop, mod8nop,
          no.space = T,
          omit.stat = c("f"),
          star.cutoffs = c(0.05, 0.01, 0.001)
          #out = "models1.txt"
)





#  -------------------------------------
# Table A.12
# 
# Pre-ER period only: with DisWinLDPVS2 replaced with DISLDPVS2
# \label{bet_dist_T2_alt2}
#
#--------------------------------------

controlsdall <- "DISceif + DISneedy + DISprimary + logDISpop + logDISincomepc + DISdensity + 
                logNum + DISmal + DisLDPVS2 + ldpsenior"

pre.red2 <- subset(elec.dat, elec.dat$year <= 1993 & 
                     elec.dat$last==1 & 
                     elec.dat$logFDISngaidpc > -10000 &
                     elec.dat$tmt_possible==1 &
                     elec.dat$LDPseats!=0)

pre.red2$ldpsenior <- as.factor(pre.red2$ldpsenior)

pre.red.pdf2 <- pdata.frame(pre.red2, index = c("district_reform2", "year"))

mod9 <- stata_plm(paste("logFDISngaidpc ~ HI + ", controlsdall), 
                  pre.red.pdf2, model = "within", effect = "time",
                  cluster = "district_reform2")
summary(mod9)

mod10 <- stata_plm(paste("logFDISngaidpc ~ HI + ", controlsdall), 
                  pre.red.pdf2, model = "within", effect = "twoways",
                  cluster = "district_reform2")
summary(mod10)

# Remove prefecture capitals from main data, then create pdf:

pre.red2n <- subset(pre.red2, !pre.red2$hor_electoral_district %in% prefcap$kucode)
pre.red2n$ldpsenior <- as.factor(pre.red2n$ldpsenior)
pre.red.pdf2n <- pdata.frame(pre.red2n, index = c("district_reform2", "year"))
length(unique(pre.red.pdf2n$district_year))

mod9nop <- stata_plm(paste("logFDISngaidpc ~ HI + ", controlsdall), 
                     pre.red.pdf2n, model = "within", effect = "time",
                     cluster = "district_reform2")
summary(mod9nop)

mod10nop <- stata_plm(paste("logFDISngaidpc ~ HI + ", controlsdall), 
                     pre.red.pdf2n, model = "within", effect = "twoways",
                     cluster = "district_reform2")
summary(mod10nop)




# ----------------------------
# Print table

stargazer(mod9, mod10, mod9nop, mod10nop,
          no.space = T,
          omit.stat = c("f"),
          star.cutoffs = c(0.05, 0.01, 0.001)
          #out = "models1.txt"
)







# ------------------------------------------------------
# TABLE 7.3 
#
# \label{bet_dist_T2post}
# -------------------------------------------------------

controlsdall <- "DISceif + DISneedy + DISprimary + logDISpop + logDISincomepc + DISdensity + 
                logNum + DISmal + DisWinLDPVS2 + ldpsenior"

pre.red2 <- subset(elec.dat, elec.dat$year > 1993 & 
                     elec.dat$last==1 & 
                     elec.dat$logFDISngaidpc > -10000 &
                     elec.dat$tmt_possible==1 & 
                     elec.dat$LDPseats!=0)

pre.red2$ldpsenior <- as.factor(pre.red2$ldpsenior)

pre.red.pdf2 <- pdata.frame(pre.red2, index = c("district_reform2", "year"))

mod11 <- stata_plm(paste("logFDISngaidpc ~ HI + ", controlsdall), 
                   pre.red.pdf2, model = "within", effect = "time",
                   cluster = "district_reform2")
summary(mod11)

# Remove prefecture capitals from maindata, then create pdf:

pre.red2n <- subset(pre.red2, !pre.red2$hor_electoral_district %in% prefcap$kucode)
pre.red2n$ldpsenior <- as.factor(pre.red2n$ldpsenior)
pre.red.pdf2n <- pdata.frame(pre.red2n, index = c("district_reform2", "year"))
length(unique(pre.red.pdf2n$district_year))

mod11nop <- stata_plm(paste("logFDISngaidpc ~ HI + ", controlsdall), 
                    pre.red.pdf2n, model = "within", effect = "time",
                    cluster = "district_reform2")
summary(mod11nop)




# ----------------------------
# print table

stargazer(mod11, mod11nop,
          no.space = T,
          omit.stat = c("f"),
          star.cutoffs = c(0.05, 0.01, 0.001)
          #out = "models1.txt"
)

# --------------------------------------
# Substantive effects of Model 1 (mod11)

newdat <- mod11$model

# which coef is HI?
mod11$coefficients

# effect of a 1 SD increase in HI:
mod11$coefficients[1]*sd(na.omit(newdat$HI))

# Exponentiate (as log DV):
exp(mod11$coefficients[1]*sd(na.omit(newdat$HI)))

# Express as percentage (as log DV):
(exp(mod11$coefficients[1]*sd(na.omit(newdat$HI)))-1)*100

# sample mean:
mean(na.omit(newdat$logFDISngaidpc))

# Exponentiate this:
exp(mean(na.omit(newdat$logFDISngaidpc)))

# Multiply by units for raw DV (hyaku man en)
exp(mean(na.omit(newdat$logFDISngaidpc)))*1000000
sample_mean <- exp(mean(na.omit(newdat$logFDISngaidpc)))*1000000

# Increase over sample mean:
(((exp(mod11$coefficients[1]*sd(na.omit(newdat$HI)))-1)*100)/100)*sample_mean

increase_over_sm <- (((exp(mod11$coefficients[1]*sd(na.omit(newdat$HI)))-1)*100)/100)*sample_mean
increase_over_sm

exp(mean(newdat$logDISpop))

# How much extra money muni gets 
exp(mean(newdat$logDISpop))*increase_over_sm











# ------------------------------------------------
# TABLE 7.4 
#
# \ref{bet_dist_T6}
#
# ------------------------------------------------

# First, we create the first-differenced municipality-level data set we need,
# limiting it to observations until 1996:

pre.red <- subset(elec.dat, elec.dat$year < 2000 & 
                  is.na(elec.dat$muncode_num) == F &
                  elec.dat$tmt_possible==1 & 
                  elec.dat$LDPseats!=0)

pre.red <- pre.red %>% mutate(cat_year = as.numeric(as.factor(year)))

pre.red.pdf <- pdata.frame(pre.red, index = c("muncode_num", "cat_year"))

pre.red.pdf2 <- pre.red.pdf[,c("F1logngaid_pc", "sumLDP_VshareVP", "bestLDP_VshareVP", 
                               "mun_ceif", "needy_pc", "primary_pc", "lnpop", "logincome_pc","mun_population_density", 
                               "DISceif", "DISneedy", "DISprimary", "logDISpop", "logDISincomepc", "DISdensity", 
                               "ncands_electoral_district",
                               "HI", "logNum", "DISmal", "ldpsenior", "LDPseats", "numLDPcand",   
                               "totseat_in_electoral_district",
                               "muncode_num","cat_year", "district_year")]


# Take first differences of the variables in the first 23 columns:
diff_pdat <- pre.red.pdf2
for (c in 1:23) {
  diff_pdat[,c] <- diff(diff_pdat[,c])
}

# Add lagged versions of the variables we need (attach the prefix "lag".)  
# Note that the data is elections, so these variables will be from the 
# previous *election", not the previous "year".

# Also, add values of variables at time t (attach the prefix curr, so non-first
# differenced variables). Can add them directly from pre.red.pdf as observations
# are in the same order.

diff_pdat <- diff_pdat %>% mutate(
  
  # lagged versions:
  
  lag.year = NA,
  lag.sumLDP_VshareVP = NA,
  lag.bestLDP_VshareVP = NA,
  lag.district_year = NA,
  lag.F1logngaid_pc = NA,
  lag.logngaid_pc = NA,
  lag.HI = NA,
  
  # current versions of variables:
  
  curr.year = pre.red.pdf$year,
  curr.Llogngaid_pc = pre.red.pdf$Llogngaid_pc,
  curr.F1logngaid_pc = pre.red.pdf$F1logngaid_pc,
  curr.logngaid_pc = pre.red.pdf$logngaid_pc,
  curr.sumLDP_VshareVP = pre.red.pdf$sumLDP_VshareVP,
  curr.bestLDP_VshareVP = pre.red.pdf$bestLDP_VshareVP,
  curr.mun_ceif = pre.red.pdf$mun_ceif,
  curr.needy_pc = pre.red.pdf$needy_pc,
  curr.primary_pc = pre.red.pdf$primary_pc,
  curr.lnpop = pre.red.pdf$lnpop,
  curr.logincome_pc = pre.red.pdf$logincome_pc,
  curr.mun_population_density = pre.red.pdf$mun_population_density,
  curr.DISceif =  pre.red.pdf$DISceif,
  curr.DISneedy = pre.red.pdf$DISneedy,
  curr.DISprimary = pre.red.pdf$DISprimary,
  curr.logDISpop = pre.red.pdf$logDISpop,
  curr.logDISincomepc = pre.red.pdf$logDISincomepc,
  curr.DISdensity = pre.red.pdf$DISdensity,
  curr.logNum = pre.red.pdf$logNum,
  curr.DISmal = pre.red.pdf$DISmal,
  curr.LDPseats = pre.red.pdf$LDPseats,
  curr.ldpsenior = pre.red.pdf$ldpsenior,
  curr.numLDPcand = pre.red.pdf$numLDPcand,
  curr.ncands_electoral_district = pre.red.pdf$ncands_electoral_district,
  curr.HI = pre.red.pdf$HI
  
)

for (var in c("lag.year", "lag.sumLDP_VshareVP", "lag.bestLDP_VshareVP", 
              "lag.district_year", "lag.F1logngaid_pc", "lag.logngaid_pc", "lag.HI")) {
  orig_var <- sub("lag.", "", var)
  diff_pdat[,var] <- lag(pre.red.pdf[,orig_var])
}

# Many checks of diff_pdat to ensure it has been made correctly
# e.g. 
# check <- subset(diff_pdat, diff_pdat$muncode_num=="1202.1")
# check[, c("curr.year", "lag.F1logngaid_pc", "curr.F1logngaid_pc", "F1logngaid_pc")]





# ---------------------------------------------
# Next, subset this first-differenced data set to 1996 only:

diff_dat96 <- subset(diff_pdat, diff_pdat$curr.year == 1996)
mean(na.omit(diff_dat96$HI))
summary(na.omit(diff_dat96$HI))

nrow(diff_dat96)

# Some municipalities have identical delta HI scores.  This is because districts under
# the new system are subsets of districts under the old.  
# So if Municipality A and B were in the same district in 1993 (with other municipalities) 
# and both of them moved to a new district in 1996, they wuld both have 
# identical delta HI scores. 



# -----------------------------------------------------
# Run models:

controls <- "mun_ceif + needy_pc + primary_pc + lnpop + logincome_pc + mun_population_density"

# Can use diff_dat96, but need to specify district_year FE separately.
class(diff_dat96$district_year)

mod12 <- lm(paste("F1logngaid_pc ~ district_year + HI + curr.sumLDP_VshareVP +
                  sumLDP_VshareVP + curr.logngaid_pc + ",
                 controls), diff_dat96)
summary(mod12)

mod13 <- lm(paste("F1logngaid_pc ~ district_year + HI + curr.sumLDP_VshareVP +
                  logNum + DISmal + ldpsenior + LDPseats + totseat_in_electoral_district +
                  sumLDP_VshareVP + curr.logngaid_pc + ",
                 controls), diff_dat96)
summary(mod13)


# -----------------------------------------
# print table
stargazer(mod12, mod13,
          no.space = T,
          omit.stat = c("f"),
          omit="district",
          star.cutoffs = c(0.05, 0.01, 0.001)
          #out = "models1.txt"
)
















# ----------------------------------------
# TABLE 7.5 
#
# \ref{bet_dist_T3}
# ----------------------------------------

controlsd <- "DISceif + DISneedy + DISprimary + logDISpop + logDISincomepc + DISdensity"

controlsdall <- "DISceif + DISneedy + DISprimary + logDISpop + logDISincomepc + DISdensity + 
                totseat_in_electoral_district + logNum + DISmal + ldpsenior + numLDPcand"





# -------------------------------------------
# Full sample, 1980-2014:

pre.red <- subset(elec.dat, 
                    elec.dat$last==1 & 
                    elec.dat$logFDISngaidpc > -10000 &
                    elec.dat$tmt_possible==1)

pre.red$ldpsenior <- as.factor(pre.red$ldpsenior)

pre.red.pdf <- pdata.frame(pre.red, index = c("district_reform2", "year"))

mod20 <- stata_plm(paste("DisWinLDPVS2 ~ HI + ", controlsdall), 
                   pre.red.pdf, model = "within", effect = "time", 
                   cluster = "district_reform2")
summary(mod20)

mod21 <- stata_plm(paste("DisLDPVS2 ~ HI + ", controlsdall), 
                   pre.red.pdf, model = "within", effect = "time", 
                   cluster = "district_reform2")
summary(mod21)

mod22 <- stata_plm(paste("LDPseats ~ HI + ", controlsdall), 
                   pre.red.pdf, model = "within", effect = "time", 
                   cluster = "district_reform2")
summary(mod22)






# ---------------------------------------
# print table
stargazer(mod20, mod21, mod22,
          no.space = T,
          omit.stat = c("f"),
          star.cutoffs = c(0.05, 0.01, 0.001)
          #out = "models1.txt"
)





# ---------------------------------------
# Substantive effects from mod20

newdat <- mod20$model
# which coef is HI?
mod20$coefficients

# effect of a 1 SD increase in HI:
mod20$coefficients[1]*sd(na.omit(newdat$HI))
# in percentage
(mod20$coefficients[1]*sd(na.omit(newdat$HI)))*100

# mean in percentage
(mean(na.omit(newdat$DisWinLDPVS2)))*100




# ---------------------------------------
# Substantive effects from mod21

newdat <- mod21$model
# which coef is HI?
mod21$coefficients

# effect of a 1 SD increase in HI:
mod21$coefficients[1]*sd(na.omit(newdat$HI))
# in percentage
(mod21$coefficients[1]*sd(na.omit(newdat$HI)))*100

# mean in percentage
(mean(na.omit(newdat$DisLDPVS2)))*100


# ---------------------------------------
# Substantive effects from mod22

newdat <- mod22$model
# which coef is HI?
mod22$coefficients

# effect of a 1 SD increase in HI:
mod22$coefficients[1]*sd(na.omit(newdat$HI))
# in percentage
(mod22$coefficients[1]*sd(na.omit(newdat$HI)))*100

# mean in percentage
(mean(na.omit(newdat$LDPseats)))*100









# -----------------------------------------
# TABLE A.13
#
# bet_dist_T3pre
# -----------------------------------------

controlsd <- "DISceif + DISneedy + DISprimary + logDISpop + logDISincomepc + DISdensity"

controlsdall <- "DISceif + DISneedy + DISprimary + logDISpop + logDISincomepc + DISdensity + 
                totseat_in_electoral_district + logNum + DISmal + ldpsenior + numLDPcand"

# -------------------------------------------

pre.red2 <- subset(elec.dat, elec.dat$year <= 1993 & 
                     elec.dat$last==1 & 
                     elec.dat$logFDISngaidpc > -10000 &
                     elec.dat$tmt_possible==1)

pre.red2$ldpsenior <- as.factor(pre.red2$ldpsenior)

pre.red.pdf2 <- pdata.frame(pre.red2, index = c("district_reform2", "year"))



# ----------------------------------------

mod23 <- stata_plm(paste("DisWinLDPVS2 ~ HI + ", controlsdall), 
                   pre.red.pdf2, model = "within", effect = "time", 
                   cluster = "district_reform2")
summary(mod23)

mod24 <- stata_plm(paste("DisLDPVS2 ~ HI + ", controlsdall), 
                   pre.red.pdf2, model = "within", effect = "time", 
                   cluster = "district_reform2")
summary(mod24)

mod25 <- stata_plm(paste("LDPseats ~ HI + ", controlsdall), 
                   pre.red.pdf2, model = "within", effect = "time", 
                   cluster = "district_reform2")
summary(mod25)

# 
stargazer(mod23, mod24, mod25,
          no.space = T,
          omit.stat = c("f"),
          star.cutoffs = c(0.05, 0.01, 0.001)
          #out = "models1.txt"
)




# -----------------------------------------
# TABLE A.14
#
# bet_dist_T3post
# -----------------------------------------

controlsd <- "DISceif + DISneedy + DISprimary + logDISpop + logDISincomepc + DISdensity"

controlsdall <- "DISceif + DISneedy + DISprimary + logDISpop + logDISincomepc + DISdensity + 
                logNum + DISmal + ldpsenior"

# -------------------------------------------

pre.red3 <- subset(elec.dat, elec.dat$year > 1993 & 
                     elec.dat$last==1 & 
                     elec.dat$logFDISngaidpc > -10000 &
                   elec.dat$tmt_possible==1)


pre.red3$ldpsenior <- as.factor(pre.red3$ldpsenior)

pre.red.pdf3 <- pdata.frame(pre.red3, index = c("district_reform2", "year"))


# ----------------------------------------

mod26 <- stata_plm(paste("DisWinLDPVS2 ~ HI + ", controlsdall), 
                   pre.red.pdf3, model = "within", effect = "time", 
                   cluster = "district_reform2")
summary(mod26)

mod27 <- stata_plm(paste("DisLDPVS2 ~ HI + ", controlsdall), 
                   pre.red.pdf3, model = "within", effect = "time", 
                   cluster = "district_reform2")
summary(mod27)

mod28 <- stata_plm(paste("LDPseats ~ HI + ", controlsdall), 
                   pre.red.pdf3, model = "within", effect = "time", 
                   cluster = "district_reform2")
summary(mod28)



# ----------------------------------------
# Print table
stargazer(mod26, mod27, mod28,
          no.space = T,
          omit.stat = c("f"),
          star.cutoffs = c(0.05, 0.01, 0.001)
          #out = "models1.txt"
)










# -----------------------------------------
# FIGURE 7.2.
#
# \label{boxplotsHI}
# ----------------------------------------

# Pre-electoral reform:

pre.red2 <- subset(elec.dat, elec.dat$year <= 1993 & 
                     elec.dat$last==1 & 
                     elec.dat$logFDISngaidpc > -10000 &
                     elec.dat$tmt_possible==1)
pre.red.pdf2 <- pdata.frame(pre.red2, index = c("district_reform2", "year"))

mean(pre.red.pdf2$HI)
summary(pre.red.pdf2$HI)
length(unique(pre.red.pdf2$district_year))

# Post-electoral reform
pre.red3 <- subset(elec.dat, elec.dat$year > 1993 & elec.dat$last==1 & elec.dat$logFDISngaidpc > -10000
                   & elec.dat$tmt_possible==1)
pre.red.pdf3 <- pdata.frame(pre.red3, index = c("district_reform2", "year"))

mean(pre.red.pdf3$HI)
length(unique(pre.red.pdf3$district_year))
summary(pre.red.pdf3$HI)

ks.test(pre.red.pdf2$HI, pre.red.pdf3$HI)

lmts <- c(0,1)
par(mfrow = c(1, 2))
boxplot(pre.red.pdf2$HI, ylim=lmts, xlab = "Pre-Electoral Reform Districts", ylab = "District-Level Asymmetry",  cex.lab=2)
boxplot(pre.red.pdf3$HI, ylim=lmts, xlab = "Post-Electoral Reform Districts", ylab = "District-Level Asymmetry",  cex.lab=2)



